/* 
    Purpose: Using the 1990 Census, this file takes cleaned variables 
             from 6c and calculates average predicted mother income 
             (i.e., "income scores") at ONLY the occupation x race x south
             level.

    Note: Will create templates/income scores at occ and occ x race levels 
          solely to impute income for missing occ x race x south cells.

    Creates: incomescores_mothers1990_byocc_byr_bys.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"
    
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

/* Note: Census microdata from 6c does not have 
         all 28 coarsened occupations, so will use 
         the template below that does have all occupations. 
*/  
    use ./code/OtherCensus_RawData/motheroccej_template_occs.dta, clear 

* Template 1: occupation
preserve 
    
    tempfile template_byocc
    save `template_byocc'
        
restore 

* Template 2: occupation x race
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    tempfile template_byocc_byr
    save `template_byocc_byr'
        
restore 

* Template 3: occupation x race x south
preserve 
    
    expand 2, gen(race)
    replace race=race+1
    
    expand 2, gen(south_merge)
    
    tempfile template_byocc_byr_bys
    save `template_byocc_byr_bys'
        
restore 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*
 
    use ./output/Census1990_mothers_ages30to50.dta, clear   
    gen number=1 

    global income_measures "inctot faminc HHinc"

*******************
*** COLLAPSE 
*******************

foreach x in byocc byocc_byr byocc_byr_bys {

    if "`x'"=="byocc" local cell "motheroccej"
    if "`x'"=="byocc_byr" local cell "motheroccej race"
    if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

preserve 
    
    collapse (rawsum) number (mean) $income_measures [aw=perwt], by(`cell') 
    
    foreach c in $income_measures {
    rename `c' avg_`c'_1990_`x'
    label var avg_`c'_1990_`x' "Coarse (mother) income score, average, 1990 using `c'"
    }
    
    tempfile incomescores
    save `incomescores'
    
    use `template_`x''
    merge 1:1 `cell' using `incomescores', nogen
    
    replace number=0 if number==.
    label var number "Number of obs in cell"
    rename number number_1990obs_`x'

    tempfile incomescores_`x'
    save `incomescores_`x''
    
restore 
        
}

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
*** IMPUTATIONS
****************

/*Note: Will impute any missing occ x race x south cells 
        with average value of less granular level. */

    local name1 "byocc"
    local name2 "byocc_byr"
    local name3 "byocc_byr_bys"
        
* Merge all templates together. Bring in template with largest # of cells first.  
    use `incomescores_byocc_byr_bys', clear
    merge m:1 motheroccej race using `incomescores_byocc_byr', nogen
    merge m:1 motheroccej using `incomescores_byocc', nogen

    * Impute occ x race using average occupation value
    foreach c in $income_measures {
    count if avg_`c'_1990_`name2'==.
    replace avg_`c'_1990_`name2' = avg_`c'_1990_`name1' if avg_`c'_1990_`name2'==.
    }
   
    * Impute occ x race x south using average occ x race value
    foreach c in $income_measures {
    count if avg_`c'_1990_`name3'==.
    replace avg_`c'_1990_`name3' = avg_`c'_1990_`name2' if avg_`c'_1990_`name3'==.
    }  

***********
* SAVE
***********

    foreach x in byocc_byr_bys {
    
        if "`x'"=="byocc_byr_bys" local cell "motheroccej race south_merge"

        preserve 
        
        bysort `cell': keep if _n==1
        keep number_1990obs_`x' `cell' avg*`x'
        
        tab motheroccej if avg_HHinc_1990_`x'==.

    ****************************************************    
    /* Last step: impute occ x race x south income 
                  when average income is missing 
                  at all levels                       */
    **************************************************** 
        /* Give dentists (occ=5) average income of 
           professionals (occ=17)  in the same race x region */           
        foreach y in 5 {
            foreach race in 1 2 {
                foreach reg in 0 1 {            
                    foreach c in $income_measures {
                        sum avg_`c'_1990_`x' if motheroccej==17 & race==`race' & south_merge==`reg'
                        replace avg_`c'_1990_`x'=`r(mean)' if motheroccej==`y' & race==`race' & south_merge==`reg'
                    }                 
                }
            }
        }
        
        assert avg_HHinc_1990_`x'!=.
        
        compress
        save ./output/incomescores_mothers1990_`x'.dta, replace
        
        restore
        
    }
